@InProceedings{su2021stvgbert,
    author    = {Su, Rui and Yu, Qian and Xu, Dong},
    title     = {STVGBert: A Visual-Linguistic Transformer Based Framework for Spatio-Temporal Video Grounding},
    booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
    month     = {October},
    year      = {2021},
    pages     = {1533-1542}
}